# Download data----------------------------------------------------------
# Create data folder if that does not exist
if (!file.exists("data")) {
  dir.create("data")
}

# url where the data located
url <- url("https://aqs.epa.gov/aqsweb/airdata/download_files.html", "rb")
url2 <- "https://aqs.epa.gov/aqsweb/airdata"
page <- read_html(url)

# get all the zip file names
filenames <- page %>%
  html_nodes("a") %>%       # find all links
  html_attr("href") %>%     # get the url
  str_subset("\\.zip")  # find those that end in zip

(yrrange <- number_range(fromyr, thruyr))

# filter the files from 2004 - 2016
filenames<- filenames[grep(yrrange, filenames)]

assign("filenames", filenames[grepl("daily_aqi_by_county", filenames)])

# url for AQI data
assign("url", paste(url2,"/",base::basename(get("filenames")), sep=""))

# Create data folder to store the download files if that does not exist
if (!file.exists("data/raw/aqs/aqi/")) {
  dir.create("data/raw/aqs/aqi")
}

filepath <- paste("data/raw/aqs/aqi/", base::basename(url), sep="")
assign(filepath, "data/raw/aqs/aqi/", base::basename(url))

purrr::walk2(.x = url, 
             .y = filepath, 
             .f = download.file)


filenames_daily <- list.files("data/raw/aqs/aqi")

dir_daily <- paste(wd,"/data/raw/aqs/aqi/", filenames_daily, sep="")

data_aqi_daily <- vector("list", length(dir_daily))
for (i in 1:length(filenames_daily)) {
  zipFileInfo <- unzip(dir_daily[i], list=TRUE)
  data_aqi_daily[[i]] <- read.csv(unz(dir_daily[i], as.character(zipFileInfo$Name)), stringsAsFactors = F)
}

data_aqi_daily_comb <- rbindlist(data_aqi_daily)

library(readstata13)
save.dta13(data_aqi_daily_comb, paste(wd,"/data/raw/aqs/aqi/","data_aqi_by_county", sep=""))

assign(paste("url", poll, sep = "_"), paste(url2,"/",base::basename(get(paste("filenames", poll, sep="_"))), sep=""))

# Create data folder to store the download files if that does not exist
if (!file.exists(paste("data/raw/aqs/", poll, "/", sep = ""))) {
  dir.create(paste("data/raw/aqs/", poll, "/", sep = ""))
}

assign(paste("filepath", poll, sep = "_"), paste0(paste("data/raw/aqs/", poll, "/", sep = ""), 
                                                  base::basename(get(paste("url", poll, sep="_")))))

purrr::walk2(.x = get(paste("url", poll, sep="_")), 
             .y = get(paste("filepath", poll, sep="_")), 
             .f = download.file)

